import pandas as pd
import os
# List all policy files uploaded
policy_files = [f"policy/{state}_policy.csv" for state in [
"Alabama", "Alaska", "American Samoa", "Arizona", "Arkansas", "California", "Colorado", "Connecticut",
"Delaware", "DistrictofColumbia", "Florida", "Georgia", "Guam", "Hawaii", "Idaho", "Illinois",
"Indiana", "Iowa", "Kansas", "Kentucky", "Louisiana", "Maine", "Maryland", "Massachusetts", "Michigan",
"Minnesota", "Mississippi", "Missouri", "Montana", "Nebraska", "Nevada", "New Hampshire", "New Jersey",
"New Mexico", "New York", "North Carolina", "North Dakota", "Ohio", "Oklahoma", "Oregon", "Pennsylvania",
"Puerto Rico", "Rhode Island", "South Carolina", "South Dakota", "Tennessee", "Texas", "Utah", "Vermont",
"Virgin Islands", "Virginia", "Washington", "West Virginia", "Wisconsin", "Wyoming"
]]
Here it loads each CSV file into a separate DataFrame stored in a dictionary with state names as keys. Also prints out the shape and column names of each DataFrame to check data consistency.
# Load all files into a dictionary of dataframes
state_policies = {os.path.basename(file).split('_')[0]: pd.read_csv(file) for file in policy_files}
# Print the shape and column names of each dataframe to inspect consistency
for state, df in state_policies.items():
print(f"{state}: {df.shape}, Columns: {df.columns}")
Alabama: (84, 14), Columns: Index(['date', 'policy', 'Restrict/Close', 'Opening (State)',
'Deferring to County', 'Testing', 'Education', 'Health/Medical',
'Emergency Level', 'Transportation', 'Budget', 'Social Distancing',
'Other', 'Vaccine'],
dtype='object')
Alaska: (65, 14), Columns: Index(['date', 'policy', 'Restrict/Close', 'Opening (State)',
'Deferring to County', 'Testing', 'Education', 'Health/Medical',
'Emergency Level', 'Transportation', 'Budget', 'Social Distancing',
'Other', 'Vaccine'],
dtype='object')
American Samoa: (35, 14), Columns: Index(['date', 'policy', 'Restrict/Close', 'Opening (State)',
'Deferring to County', 'Testing', 'Education', 'Health/Medical',
'Emergency Level', 'Transportation', 'Budget', 'Social Distancing',
'Other', 'Vaccine'],
dtype='object')
Arizona: (159, 14), Columns: Index(['date', 'policy', 'Restrict/Close', 'Opening (State)',
'Deferring to County', 'Testing', 'Education', 'Health/Medical',
'Emergency Level', 'Transportation', 'Budget', 'Social Distancing',
'Other', 'Vaccine'],
dtype='object')
Arkansas: (92, 14), Columns: Index(['date', 'policy', 'Restrict/Close', 'Opening (State)',
'Deferring to County', 'Testing', 'Education', 'Health/Medical',
'Emergency Level', 'Transportation', 'Budget', 'Social Distancing',
'Other', 'Vaccine'],
dtype='object')
California: (200, 14), Columns: Index(['date', 'policy', 'Restrict/Close', 'Opening (State)',
'Deferring to County', 'Testing', 'Education', 'Health/Medical',
'Emergency Level', 'Transportation', 'Budget', 'Social Distancing',
'Other', 'Vaccine'],
dtype='object')
Colorado: (258, 14), Columns: Index(['date', 'policy', 'Restrict/Close', 'Opening (State)',
'Deferring to County', 'Testing', 'Education', 'Health/Medical',
'Emergency Level', 'Transportation', 'Budget', 'Social Distancing',
'Other', 'Vaccine'],
dtype='object')
Connecticut: (284, 14), Columns: Index(['date', 'policy', 'Restrict/Close', 'Opening (State)',
'Deferring to County', 'Testing', 'Education', 'Health/Medical',
'Emergency Level', 'Transportation', 'Budget', 'Social Distancing',
'Other', 'Vaccine'],
dtype='object')
Delaware: (155, 14), Columns: Index(['date', 'policy', 'Restrict/Close', 'Opening (State)',
'Deferring to County', 'Testing', 'Education', 'Health/Medical',
'Emergency Level', 'Transportation', 'Budget', 'Social Distancing',
'Other', 'Vaccine'],
dtype='object')
DistrictofColumbia: (99, 14), Columns: Index(['date', 'policy', 'Restrict/Close', 'Opening (State)',
'Deferring to County', 'Testing', 'Education', 'Health/Medical',
'Emergency Level', 'Transportation', 'Budget', 'Social Distancing',
'Other', 'Vaccine'],
dtype='object')
Florida: (86, 14), Columns: Index(['date', 'policy', 'Restrict/Close', 'Opening (State)',
'Deferring to County', 'Testing', 'Education', 'Health/Medical',
'Emergency Level', 'Transportation', 'Budget', 'Social Distancing',
'Other', 'Vaccine'],
dtype='object')
Georgia: (129, 14), Columns: Index(['date', 'policy', 'Restrict/Close', 'Opening (State)',
'Deferring to County', 'Testing', 'Education', 'Health/Medical',
'Emergency Level', 'Transportation', 'Budget', 'Social Distancing',
'Other', 'Vaccine'],
dtype='object')
Guam: (156, 14), Columns: Index(['date', 'policy', 'Restrict/Close', 'Opening (State)',
'Deferring to County', 'Testing', 'Education', 'Health/Medical',
'Emergency Level', 'Transportation', 'Budget', 'Social Distancing',
'Other', 'Vaccine'],
dtype='object')
Hawaii: (79, 14), Columns: Index(['date', 'policy', 'Restrict/Close', 'Opening (State)',
'Deferring to County', 'Testing', 'Education', 'Health/Medical',
'Emergency Level', 'Transportation', 'Budget', 'Social Distancing',
'Other', 'Vaccine'],
dtype='object')
Idaho: (82, 14), Columns: Index(['date', 'policy', 'Restrict/Close', 'Opening (State)',
'Deferring to County', 'Testing', 'Education', 'Health/Medical',
'Emergency Level', 'Transportation', 'Budget', 'Social Distancing',
'Other', 'Vaccine'],
dtype='object')
Illinois: (120, 14), Columns: Index(['date', 'policy', 'Restrict/Close', 'Opening (State)',
'Deferring to County', 'Testing', 'Education', 'Health/Medical',
'Emergency Level', 'Transportation', 'Budget', 'Social Distancing',
'Other', 'Vaccine'],
dtype='object')
Indiana: (156, 14), Columns: Index(['date', 'policy', 'Restrict/Close', 'Opening (State)',
'Deferring to County', 'Testing', 'Education', 'Health/Medical',
'Emergency Level', 'Transportation', 'Budget', 'Social Distancing',
'Other', 'Vaccine'],
dtype='object')
Iowa: (84, 14), Columns: Index(['date', 'policy', 'Restrict/Close', 'Opening (State)',
'Deferring to County', 'Testing', 'Education', 'Health/Medical',
'Emergency Level', 'Transportation', 'Budget', 'Social Distancing',
'Other', 'Vaccine'],
dtype='object')
Kansas: (116, 14), Columns: Index(['date', 'policy', 'Restrict/Close', 'Opening (State)',
'Deferring to County', 'Testing', 'Education', 'Health/Medical',
'Emergency Level', 'Transportation', 'Budget', 'Social Distancing',
'Other', 'Vaccine'],
dtype='object')
Kentucky: (130, 14), Columns: Index(['date', 'policy', 'Restrict/Close', 'Deferring to County',
'Opening (County)', 'Testing', 'Education', 'Health/Medical',
'Emergency Level', 'Transportation', 'Budget', 'Social Distancing',
'Other', 'Vaccine'],
dtype='object')
Louisiana: (81, 14), Columns: Index(['date', 'policy', 'Restrict/Close', 'Opening (State)',
'Deferring to County', 'Testing', 'Education', 'Health/Medical',
'Emergency Level', 'Transportation', 'Budget', 'Social Distancing',
'Other', 'Vaccine'],
dtype='object')
Maine: (115, 14), Columns: Index(['date', 'policy', 'Restrict/Close', 'Opening (State)',
'Deferring to County', 'Testing', 'Education', 'Health/Medical',
'Emergency Level', 'Transportation', 'Budget', 'Social Distancing',
'Other', 'Vaccine'],
dtype='object')
Maryland: (135, 15), Columns: Index(['date', 'policy', 'Restrict/Close', 'Opening (State)',
'Deferring to County', 'Testing', 'Education', 'Health/Medical',
'Emergency Level', 'Transportation', 'Budget', 'Social Distancing',
'Other', 'Vaccine', 'Unnamed: 14'],
dtype='object')
Massachusetts: (161, 14), Columns: Index(['date', 'policy', 'Restrict/Close', 'Opening (State)',
'Deferring to County', 'Testing', 'Education', 'Health/Medical',
'Emergency Level', 'Transportation', 'Budget', 'Social Distancing',
'Other', 'Vaccine'],
dtype='object')
Michigan: (200, 14), Columns: Index(['date', 'policy', 'Restrict/Close', 'Opening (State)',
'Deferring to County', 'Testing', 'Education', 'Health/Medical',
'Emergency Level', 'Transportation', 'Budget', 'Social Distancing',
'Other', 'Vaccine'],
dtype='object')
Minnesota: (140, 14), Columns: Index(['date', 'policy', 'Restrict/Close', 'Opening (State)',
'Deferring to County', 'Testing', 'Education', 'Health/Medical',
'Emergency Level', 'Transportation', 'Budget', 'Social Distancing',
'Other', 'Vaccine'],
dtype='object')
Mississippi: (57, 14), Columns: Index(['date', 'policy', 'Restrict/Close', 'Opening (State)',
'Deferring to County', 'Testing', 'Education', 'Health/Medical',
'Emergency Level', 'Transportation', 'Budget', 'Social Distancing',
'Other', 'Vaccine'],
dtype='object')
Missouri: (102, 14), Columns: Index(['date', 'policy', 'Restrict/Close', 'Opening (State)',
'Deferring to County', 'Testing', 'Education', 'Health/Medical',
'Emergency Level', 'Transportation', 'Budget', 'Social Distancing',
'Other', 'Vaccine'],
dtype='object')
Montana: (69, 14), Columns: Index(['date', 'policy', 'Restrict/Close', 'Opening (State)',
'Deferring to County', 'Testing', 'Education', 'Health/Medical',
'Emergency Level', 'Transportation', 'Budget', 'Social Distancing',
'Other', 'Vaccine'],
dtype='object')
Nebraska: (89, 14), Columns: Index(['date', 'policy', 'Restrict/Close', 'Opening (State)',
'Deferring to County', 'Testing', 'Education', 'Health/Medical',
'Emergency Level', 'Transportation', 'Budget', 'Social Distancing',
'Other', 'Vaccine'],
dtype='object')
Nevada: (78, 14), Columns: Index(['date', 'policy', 'Restrict/Close', 'Opening (State)',
'Deferring to County', 'Testing', 'Education', 'Health/Medical',
'Emergency Level', 'Transportation', 'Budget', 'Social Distancing',
'Other', 'Vaccine'],
dtype='object')
New Hampshire: (105, 14), Columns: Index(['date', 'policy', 'Restrict/Close', 'Opening (State)',
'Deferring to County', 'Testing', 'Education', 'Health/Medical',
'Emergency Level', 'Transportation', 'Budget', 'Social Distancing',
'Other', 'Vaccine'],
dtype='object')
New Jersey: (257, 14), Columns: Index(['date', 'policy', 'Restrict/Close', 'Opening (State)',
'Deferring to County', 'Testing', 'Education', 'Health/Medical',
'Emergency Level', 'Transportation', 'Budget', 'Social Distancing',
'Other', 'Vaccine'],
dtype='object')
New Mexico: (139, 14), Columns: Index(['date', 'policy', 'Restrict/Close', 'Opening (State)',
'Deferring to County', 'Testing', 'Education', 'Health/Medical',
'Emergency Level', 'Transportation', 'Budget', 'Social Distancing',
'Other', 'Vaccine'],
dtype='object')
New York: (406, 14), Columns: Index(['date', 'policy', 'Restrict/Close', 'Opening (State)',
'Deferring to County', 'Testing', 'Education', 'Health/Medical',
'Emergency Level', 'Transportation', 'Budget', 'Social Distancing',
'Other', 'Vaccine'],
dtype='object')
North Carolina: (146, 14), Columns: Index(['date', 'policy', 'Restrict/Close', 'Opening (State)',
'Deferring to County', 'Testing', 'Education', 'Health/Medical',
'Emergency Level', 'Transportation', 'Budget', 'Social Distancing',
'Other', 'Vaccine'],
dtype='object')
North Dakota: (87, 14), Columns: Index(['date', 'policy', 'Restrict/Close', 'Opening (State)',
'Deferring to County', 'Testing', 'Education', 'Health/Medical',
'Emergency Level', 'Transportation', 'Budget', 'Social Distancing',
'Other', 'Vaccine'],
dtype='object')
Ohio: (194, 14), Columns: Index(['date', 'policy', 'Restrict/Close', 'Opening (State)',
'Deferring to County', 'Testing', 'Education', 'Health/Medical',
'Emergency Level', 'Transportation', 'Budget', 'Social Distancing',
'Other', 'Vaccine'],
dtype='object')
Oklahoma: (70, 14), Columns: Index(['date', 'policy', 'Restrict/Close', 'Opening (State)',
'Deferring to County', 'Testing', 'Education', 'Health/Medical',
'Emergency Level', 'Transportation', 'Budget', 'Social Distancing',
'Other', 'Vaccine'],
dtype='object')
Oregon: (142, 14), Columns: Index(['date', 'policy', 'Restrict/Close', 'Opening (State)',
'Deferring to County', 'Testing', 'Education', 'Health/Medical',
'Emergency Level', 'Transportation', 'Budget', 'Social Distancing',
'Other', 'Vaccine'],
dtype='object')
Pennsylvania: (212, 14), Columns: Index(['date', 'policy', 'Restrict/Close', 'Opening (State)',
'Deferring to County', 'Testing', 'Education', 'Health/Medical',
'Emergency Level', 'Transportation', 'Budget', 'Social Distancing',
'Other', 'Vaccine'],
dtype='object')
Puerto Rico: (63, 14), Columns: Index(['date', 'policy', 'Restrict/Close', 'Opening (State)',
'Deferring to County', 'Testing', 'Education', 'Health/Medical',
'Emergency Level', 'Transportation', 'Budget', 'Social Distancing',
'Other', 'Vaccine'],
dtype='object')
Rhode Island: (149, 14), Columns: Index(['date', 'policy', 'Restrict/Close', 'Opening (State)',
'Deferring to County', 'Testing', 'Education', 'Health/Medical',
'Emergency Level', 'Transportation', 'Budget', 'Social Distancing',
'Other', 'Vaccine'],
dtype='object')
South Carolina: (40, 14), Columns: Index(['date', 'policy', 'Restrict/Close', 'Opening (State)',
'Deferring to County', 'Testing', 'Education', 'Health/Medical',
'Emergency Level', 'Transportation', 'Budget', 'Social Distancing',
'Other', 'Vaccine'],
dtype='object')
South Dakota: (66, 14), Columns: Index(['date', 'policy', 'Restrict/Close', 'Opening (State)',
'Deferring to County', 'Testing', 'Education', 'Health/Medical',
'Emergency Level', 'Transportation', 'Budget', 'Social Distancing',
'Other', 'Vaccine'],
dtype='object')
Tennessee: (113, 14), Columns: Index(['date', 'policy', 'Restrict/Close', 'Opening (State)',
'Deferring to County', 'Testing', 'Education', 'Health/Medical',
'Emergency Level', 'Transportation', 'Budget', 'Social Distancing',
'Other', 'Vaccine'],
dtype='object')
Texas: (223, 14), Columns: Index(['date', 'policy', 'Restrict/Close', 'Opening (State)',
'Deferring to County', 'Testing', 'Education', 'Health/Medical',
'Emergency Level', 'Transportation', 'Budget', 'Social Distancing',
'Other', 'Vaccine'],
dtype='object')
Utah: (88, 14), Columns: Index(['date', 'policy', 'Restrict/Close', 'Opening (State)',
'Deferring to County', 'Testing', 'Education', 'Health/Medical',
'Emergency Level', 'Transportation', 'Budget', 'Social Distancing',
'Other', 'Vaccine'],
dtype='object')
Vermont: (105, 14), Columns: Index(['date', 'policy', 'Restrict/Close', 'Opening (State)',
'Deferring to County', 'Testing', 'Education', 'Health/Medical',
'Emergency Level', 'Transportation', 'Budget', 'Social Distancing',
'Other', 'Vaccine'],
dtype='object')
Virgin Islands: (116, 14), Columns: Index(['date', 'policy', 'Restrict/Close', 'Opening (State)',
'Deferring to County', 'Testing', 'Education', 'Health/Medical',
'Emergency Level', 'Transportation', 'Budget', 'Social Distancing',
'Other', 'Vaccine'],
dtype='object')
Virginia: (128, 14), Columns: Index(['date', 'policy', 'Restrict/Close', 'Opening (State)',
'Deferring to County', 'Testing', 'Education', 'Health/Medical',
'Emergency Level', 'Transportation', 'Budget', 'Social Distancing',
'Other', 'Vaccine'],
dtype='object')
Washington: (277, 14), Columns: Index(['date', 'policy', 'Restrict/Close', 'Opening (State)',
'Deferring to County', 'Testing', 'Education', 'Health/Medical',
'Emergency Level', 'Transportation', 'Budget', 'Social Distancing',
'Other', 'Vaccine'],
dtype='object')
West Virginia: (142, 14), Columns: Index(['date', 'policy', 'Restrict/Close', 'Opening (State)',
'Deferring to County', 'Testing', 'Education', 'Health/Medical',
'Emergency Level', 'Transportation', 'Budget', 'Social Distancing',
'Other', 'Vaccine'],
dtype='object')
Wisconsin: (130, 14), Columns: Index(['date', 'policy', 'Restrict/Close', 'Opening (State)',
'Deferring to County', 'Testing', 'Education', 'Health/Medical',
'Emergency Level', 'Transportation', 'Budget', 'Social Distancing',
'Other', 'Vaccine'],
dtype='object')
Wyoming: (83, 14), Columns: Index(['date', 'policy', 'Restrict/Close', 'Opening (State)',
'Deferring to County', 'Testing', 'Education', 'Health/Medical',
'Emergency Level', 'Transportation', 'Budget', 'Social Distancing',
'Other', 'Vaccine'],
dtype='object')
The block compiles all individual state DataFrames into a single DataFrame, appending a new column to denote the state an then iterates over the dictionary of DataFrames, by appending a state column to each and concatenating them into one DataFrame
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
# Create a dataframe to accumulate all policy data with an additional state column
all_policies = pd.DataFrame()
for state, df in state_policies.items():
df['State'] = state
all_policies = pd.concat([all_policies, df], ignore_index=True)
# Convert date column to datetime format for easier handling
all_policies['date'] = pd.to_datetime(all_policies['date'], errors='coerce')
# Group by date and state and count the number of policies
policy_counts = all_policies.groupby([all_policies['date'].dt.to_period('M'), 'State']).size().unstack(fill_value=0)
# Convert the period index back to datetime for better handling in seaborn
policy_counts.index = policy_counts.index.to_timestamp()
# Creating a heatmap
plt.figure(figsize=(20, 15))
sns.heatmap(policy_counts.T, cmap='viridis', linewidths=.5, linecolor='gray', annot=False)
plt.title('Heatmap of Policy Implementation Over Time by State')
plt.xlabel('Month-Year')
plt.ylabel('State')
plt.xticks(rotation=45)
plt.show()
C:\Users\varsh\AppData\Local\Temp\ipykernel_14916\3405106654.py:2: UserWarning: Could not infer format, so each element will be parsed individually, falling back to `dateutil`. To ensure parsing is consistent and as-expected, please specify a format. all_policies['date'] = pd.to_datetime(all_policies['date'], errors='coerce')
Visualizing the box plot for policy count for each state
import plotly.express as px
# Group data
policy_counts_by_state = all_policies.groupby(['State', all_policies['date'].dt.to_period('M')]).size().reset_index(name='counts')
# Plot
plt.figure(figsize=(20, 10))
sns.boxplot(x='State', y='counts', data=policy_counts_by_state)
plt.xticks(rotation=90)
plt.title('Distribution of Policy Counts by State')
plt.xlabel('State')
plt.ylabel('Policy Counts')
plt.grid(True)
plt.show()
C:\ProgramData\anaconda3\Lib\site-packages\seaborn\_oldcore.py:1498: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead if pd.api.types.is_categorical_dtype(vector): C:\ProgramData\anaconda3\Lib\site-packages\seaborn\_oldcore.py:1498: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead if pd.api.types.is_categorical_dtype(vector): C:\ProgramData\anaconda3\Lib\site-packages\seaborn\_oldcore.py:1498: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead if pd.api.types.is_categorical_dtype(vector):
import plotly.express as px
# Prepare data
treemap_data = all_policies.groupby(['State', 'policy']).size().reset_index(name='counts')
# Create the treemap
fig = px.treemap(treemap_data, path=['State', 'policy'], values='counts', color='counts',
color_continuous_scale='Blues', title='Treemap of Policy Implementation by State and Type')
fig.show()
We have conducted a detailed examination and visualization of policy data across different U.S. states and territories. By aggregating policy data into a single DataFrame, extensive analysis of how policies are implemented over time and by location.
Key considerations are: The heatmap clearly shows how policy implementations fluctuate over time, helping identify periods of high activity. The boxplot reveals the variability in the number of policies across states, with some states showing more consistent policy activity than others. The treemap provides a granular view of policies by type within each state, highlighting the focus areas of policy enforcement or introduction.